# This code replicates Figure 11a
rm(list=ls())
setwd("")
library(foreign)
library(multiwayvcov)
library(ggplot2)
library(lme4)
library(lmtest)
library(multiwayvcov)
library(plyr)
options(scipen = 999)
data <- read.dta("data.dta")

# Set seed, number of simulations and number of controls to be added
set.seed(290)
sim <- 1000
n_controls <- 10

# The following vector includes all potential controls one could plausibly include
# Note that we excluded covariates that do not exhibit variation
# Note also that this exercise does not work when estimates are infinitely large 

controls <- c("distance_cusco", "distance_cusco_straight", "elevationfeet", "lon", "lat", "school_1992", "hospital_1992", "post_station_1992", "football_field_1992", "football_field_1992_minutes", "activity_shining_path", "total_pop", "male_census", "female_census", "x1_below", "x1_4", "x5_14", "x15_64", "x65plus", "natives_census", "migrants_census", "foreigners", "handycap", "blind", "mental_issues", "polio", "underdeveloped_extremities", "overdeveloped_extremities", "other_handycaps", "analphabets_total", "analphabets_male", "analphabets_female", "no_educ", "preschool", "primary_educ", "secondary_educ", "higher_educ", "workers6_14", "workers_15plus", "workers_15plus_employed", "workers_15plus_unemployed", "occ_agriculture", "occ_manu_mine_constr", "occ_retail", "occ_hawker", "occ_other_services", "occ_other", "cat_occ_wage_earner", "cat_occ_ind", "cat_occ_employer", "cat_occ_fam", "cat_occ_household", "econ_primary", "econ_secondary", "econ_tertiary", "living_together", "married_census", "alone", "other", "head_household", "head_household_male", "head_household_female", "average_children_per_mother", "women_with_more_4_children", "single_mothers", "single_mothers_12_19", "single_mothers_20_29", "single_mothers_30_49", "young_mothers", "flats_with_unoccupied_homes", "households", "average_household_size", "ind_house", "makeshift", "other_1", "ownership_own", "ownership_rental", "ownership_occupied", "ownership_other", "walls_cement", "walls_quincho", "walls_stone", "walls_wood", "walls_mat", "walls_other", "roof_concrete", "roof_corrugated_steel", "roof_mats", "roof_straw", "roof_other", "water_private", "water_public_well", "water_tankwagon", "water_other", "sanitary_connected_grid", "sanitary_privy_pit", "sanitory_other", "sanitory_no", "flats_with_electricity", "flats_no_electricity", "flats_with_1_room", "households_only_dormitories", "households_share_toilet", "households_with_commerical_roms", "no_domestic_appliances", "only_radio", "radio_tv", "sewing_machine", "fridge", "tricycle", "with_4plus_appliances", "age", "male", "internet")

## clustered standard errors
coefSe <- lapply(1:n_controls, function(x) { 

 replicate(sim, {

  sample <- paste0(sample(controls, x), "")
  treats <- c("treatment_adventist", "treatment_mixed", "treatment_peruana", "treatment_maranatha")
  vars <- c(treats, sample)
  formula <- paste0("exp1_leave", " ~ ", paste(vars, collapse = "+"))
  mod <- lm(formula, data=data)
  modCl <- coeftest(mod, vcov=function(x) cluster.vcov(x, data$village))
  modCl[2:5,1:2]
 
 })
  
})

ests <- lapply(coefSe, function(x) alply(x, 3))
estsDf <- lapply(ests, function(x) do.call(rbind, x))
treat <- rownames(estsDf[[1]])
estsDf  <- lapply(estsDf, data.frame)
estsDf <- lapply(estsDf, function(x) {x$treat <- treat; x})

estsMean <- lapply(estsDf, function(x) aggregate(x[,1:2], list(x$treat), function(y) mean(y, na.rm = T)))
estsMean <- lapply(estsMean)

estsFin <- do.call(rbind, estsMean)
estsFin$n_c <- rep(1:n_controls, each=4)
colnames(estsFin)[1:3] <- c("treat", "coef", "se")
estsFin$Treatment <- ifelse(estsFin$treat == "treatment_adventist", "Adventist",
                            ifelse(estsFin$treat == "treatment_maranatha", "Maranatha",
                                   ifelse(estsFin$treat == "treatment_mixed", "Mixed",
                                          ifelse(estsFin$treat == "treatment_peruana", "Peruana", NA))))

p <- ggplot(estsFin, aes(x = factor(n_c), y = coef)) + 
  geom_point(aes(color = Treatment), position = position_dodge(width = 0.5)) +
  geom_errorbar(aes(ymax = coef + se, ymin = coef - se, color = Treatment), width = 0, position = position_dodge(width = 0.5)) +
  xlab("Number of Controls") +
  ylab("Effect") +
  scale_y_continuous(breaks = c(-0.4, -0.30, -0.20, -0.10, 0, 0.10, 0.20)) +
  theme_bw() + 
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  geom_hline(yintercept = 0, linetype = "dashed")
  
p

ggsave("clusteredSE.pdf", p, width =7, height = 5)

